{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## 6.1.3 梯度消失和梯度爆炸的原因"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2023-05-09T07:09:23.284159800Z",
     "start_time": "2023-05-09T07:09:22.685160500Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [],
   "source": [
    "N = 2  # mini-batch的大小\n",
    "H = 3  # 隐藏状态向量的维数\n",
    "T = 20  # 时序数据的长度"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T07:09:24.868872400Z",
     "start_time": "2023-05-09T07:09:24.861878100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "dh = np.ones((N, H))\n",
    "np.random.seed(3)  # 为了复现，固定随机数种子\n",
    "Wh = np.random.randn(H, H)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T07:09:36.020909700Z",
     "start_time": "2023-05-09T07:09:36.005911500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [],
   "source": [
    "norm_list = []\n",
    "for t in range(T):\n",
    "    dh = np.dot(dh, Wh.T)\n",
    "    norm = np.sqrt(np.sum(dh ** 2)) / N\n",
    "    norm_list.append(norm)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T07:37:24.518627900Z",
     "start_time": "2023-05-09T07:37:24.513634100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 梯度裁剪"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [],
   "source": [
    "dW1 = np.random.rand(3, 3) * 10\n",
    "dW2 = np.random.rand(3, 3) * 10\n",
    "grads = [dW1, dW2]\n",
    "max_norm = 5.0"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T07:47:54.373321800Z",
     "start_time": "2023-05-09T07:47:54.359322600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [],
   "source": [
    "def clip_grads(grads, max_norm):\n",
    "    total_norm = 0\n",
    "    for grad in grads:\n",
    "        total_norm += np.sum(grad ** 2)\n",
    "    total_norm = np.sqrt(total_norm)\n",
    "\n",
    "    rate = max_norm / (total_norm + 1e-6)\n",
    "    if rate < 1:\n",
    "        for grad in grads:\n",
    "            grad *= rate\n",
    "\n",
    "clip_grads(grads,max_norm)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T07:54:29.178190400Z",
     "start_time": "2023-05-09T07:54:29.170191900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
